“The simple graph has brought more information to the data analyst’s mind than any other device.”
— John Tukey
gg in “ggplot2” stands for Grammar of GraphicsStanford Open Policing Project
Police Searches Drop Dramatically in States that Legalized Marijuana
stops <- read_csv("./data/opp-search-marijuana_state.csv") %>%
filter(state %in% c("WA", "CO")) %>%
mutate(legalization_status = ifelse(quarter <= "2013-01-01", "pre","post"),
search_rate_100 = search_rate * 100)
Exercise: Determine which variable is mapped to which aesthetic (x-axis, y-axis, etc.) element of the dataset.
class: center, middle
ggplot(data = stops)
ggplot(data = stops, mapping = aes(x = quarter, y = search_rate_100))
ggplot(data = stops, mapping = aes(x = quarter, y = search_rate_100)) +
geom_point()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess")
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE)
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d() +
theme_minimal()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d() +
theme_minimal() +
labs(x = "Year", y = "Search Rate", color = "Driver Race",
title = "Washington Highway Patrol Searches", subtitle = "Searches Per Hundred stops")
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))+
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = stops, aes(x = quarter, y = search_rate_100, size = search_rate_100)) +
geom_point()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, size = search_rate_100)) +
geom_point(alpha = 0.5)
Exercise: Using information from https://ggplot2.tidyverse.org/articles/ggplot2-specs.html add color, size, alpha, and shape aesthetics to your graph. Experiment. Do different things happen when you map aesthetics to discrete and continuous variables? What happens when you use more than one aesthetic?
stops %>% ggplot(aes(x = quarter , y = search_rate_100, color = driver_race)) +
geom_point() +
theme_minimal(base_size = 12) +
labs(title = "Washington") + ## scale_fill for 2d objects like bars, scale_color for lines
#scale_color_brewer(type = qual, palette = "Dark2")
theme(legend.title = element_blank()) + scale_x_date(date_breaks = "1 year", date_labels = "%Y")
geom levelggplot(data = stops) +
geom_point(mapping = aes(x = quarter, y = search_rate_100))
geomsggplot(data = stops, mapping = aes(x = quarter, y = search_rate_100)) +
geom_point() +
geom_smooth(aes(color = driver_race), method = "loess", se = FALSE)
aes()ggplot(data = stops,
mapping = aes(x = quarter,
y = search_rate_100,
color = driver_race)) +
geom_point()
aes()ggplot(data = stops,
mapping = aes(x = quarter,
y = search_rate_100)) +
geom_point(color = "red")
ggplot(data = stops,
mapping = aes(x = quarter,
y = search_rate_100)) +
geom_point(color = "#63B3E8")
stops %>%
ggplot(aes(x = quarter, y = search_rate_100)) +
geom_point()
ggplot(stops, aes(x = quarter, y = search_rate_100)) +
geom_point()
p <- ggplot(stops, aes(x = quarter, y = search_rate_100)) +
geom_point()
p + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = stops) +
geom_point(aes(x = quarter, y = search_rate_100, color = "blue"))
Exercise: What is wrong with the following?
stops %>%
ggplot(aes(x = quarter, y = search_rate_100, color = legalization_status)) %>%
geom_point()
What is wrong with the following?
stops %>%
ggplot(aes(x = quarter, y = search_rate_100, color = legalization_status)) %>%
geom_point()
## Error: `mapping` must be created by `aes()`
## Did you use %>% instead of +?
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_point()
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_point() +
geom_line()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
geom_line()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(span = 0.2, se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = filter(stops, search_rate_100 < .2),
size = 5, color = "gray") +
geom_point()
Exercise: Work with your neighbor to sketch what the following plots will look like. No cheating! Do not run the code, just think through the code for the time being.
pre_legalization_high <- stops %>%
filter((quarter < "2013-01-01" & search_rate_100 > 1.0))
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text(data = pre_legalization_high, aes(y = search_rate_100, label = search_rate_100),
size = 2, color = "black")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point()
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
geom_point(data = pre_legalization_high, size = 5, color = "gray")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point()
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text(data = pre_legalization_high, aes(y = search_rate_100, label = search_rate_100),
size = 2, color = "black")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text(data = pre_legalization_high, aes(y = search_rate_100 + .05, label = search_rate_100),
size = 2, color = "black")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_text_repel(data = pre_legalization_high,
aes(x = quarter, y = search_rate_100,
label = as.character(quarter)),
size = 3, color = "black")
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point(data = pre_legalization_high, size = 5, color = "gray") +
geom_point() +
geom_label_repel(data = pre_legalization_high,
aes(x = quarter, y = search_rate_100,
label = as.character(quarter)),
size = 3, color = "black")
Exercise: How would you fix the following plot?
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(color = "blue")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
scale_color_manual(values = c("#FF6EB4", "#00BFFF", "#008B8B")) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_line() +
facet_wrap(state ~ driver_race)
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_line() +
facet_grid(state ~ driver_race)
ggplot(data = stops, aes(x = quarter, y = search_rate_100)) +
geom_line() +
facet_grid(driver_race ~ state)
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_reverse()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_sqrt()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
scale_y_continuous(breaks = c(0, 0.25, 0.5, .75, 1.0))
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme_bw()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme_dark()
ggplot(data = stops, aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))
wa_stops <- stops %>% filter(state == "WA") %>%
ggplot(aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(se = FALSE) +
labs(title = "Washington")
co_stops <- stops %>% filter(state == "CO") %>%
ggplot(aes(x = quarter, y = search_rate_100, color = driver_race)) +
geom_smooth(se = FALSE) +
labs(title = "Colorado") +
theme(legend.position = "none")
wa_stops + co_stops
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
(wa_stops / co_stops)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plotly::ggplotly(wa_stops)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Final Exercise:
Starter code:
stops %>% filter(state == "WA") %>%
ggplot(aes(quarter, search_rate_100, color = driver_race)) +
geom_point() +
geom_smooth(method = lm, se = FALSE)
‘?labs’ layer controls title, subtitle, caption, etc.
‘?scale_color_manual’ layer allows you to specify your own colors to the levels
‘?geom_vline’ layer draws a vertical line across the plot. (hint: the x-axis is a date data type)
‘?theme’ controls the non-data elements of the plot like size of text, angle of axis ticks, etc.
‘?annotate’ creates a text annotation layer. Same trick with coordinates as geom_vline
Experiment with themes
To really master themes:
ggplot2.tidyverse.org/articles/extending-ggplot2.html#creating-your-own-theme
class: center, middle
Make any plot by filling in the parameters of this template
knitr::include_graphics("./img/ggplot2-template.png")